################################################################################
# SIMDATANORM, this is the function that generates the data for random effects
# that are normally distributed (see simdata for uniformally distributed random effects).
# The basic file came from Therneau, comments and sample objects by SDB May/June 2002/August
################################################################################
# nn gives the sample size, maxev gives the maximum number of events per subject,
#maxt gives the maximum follow up time, p gives the proportion of subjects that
#receive treatment, rxe is the treatment effect (beta), covar is a function to
#generate the random effect (heterogeneity, comes from a uniform distribution,
#see covar.R), times gives the time to next event (the function klambda or klambda2, 
#for example), rho is autocorrelation parameter, which set to 1.0 for independence, min and maxi
#give the range of the uniform distribution for the draws in covar.

simdatanorm <- function(nn, maxev, maxt, p, rxe, base, covar, times, rho=1, mean=0, std=1) {
# set.seed(1)
    n    <- floor(nn/2) #largest integer <= nn/2. 
    xx   <- covarnorm(nn,mean,std) #This says give the function covar nn as the 
#number of covariates to return.
    rx   <- c(rep(1,n), rep(0,n)) #This combines into a row a set of 1s 
#repeated n times and a set of 0s repeated n times.  
    risk <- xx #Risk is the vector of random effects created by covar
    risk[1:n] <- risk[1:n] + rep(rxe, round(p*n)) #Replace risk for rows 1 to n with 
#what's there (random effect) plus rxe repeated for p*n, this adds 
#the treatment effect (rxe) to the risk for the portion of the 
#sample (p*n) that receives treatment.
    rxgrp <- c(rep(1:length(p), round(p*n)), rep(0,n)) #combine into a row: repeat 1 to 
# # of p-rows p*n times combined with 0 repeated n times

gapstart<-rep(0,nn)
    risk <- exp(risk) * base #risk is replaced with exp of risk * baseline risk

##########################################################################################
# times = function to return a matrix of inter-event times
# times(risk, k,rho), where risk[1:n] is the relative hazard of a subject
# See klambda.R, which generates the functions klambda and klambda2, which may be passed to simdatanorm.R
###########################################################################################

    temp <- times(risk,maxev,rho) 
#temp:
#numeric matrix: 10 rows, 6 columns. 
#           [,1]       [,2]      [,3]      [,4]       [,5]         [,6] 
#[1,]  0.3371361  6.7539995 0.9556816  5.122449 13.5268065 13.606789456
#[2,]  0.8753686  0.5676004 0.2992863  3.328514  3.2698339  0.006568544
#[3,] 21.9678298  4.0214176 0.8315382  8.481424  2.7373305 16.308966673
#[4,]  0.1460226  1.5973235 2.3641998  2.865536  0.8511547  5.007198953
#[5,]  1.4435692 30.7964420 8.6353346  8.334783 97.9373983 29.609612090
#[6,]  3.0869505  8.1448012 3.9846210 14.806031  2.8327831 18.215745656

    temp2<- apply(temp,1,cumsum)      #stop times: cumsum rows of temp -- compute 
#cumulative sum of rows, gives total time up through that row (so cumulates times up 
#to the rowth event

#temp2:
#numeric matrix: 6 rows, 10 columns. 
#           [,1]      [,2]     [,3]       [,4]       [,5]     [,6]      [,7]       [,8]      [,9]     [,10] 
#[1,]  0.3371361 0.8753686 21.96783  0.1460226   1.443569  3.08695  3.233358 0.03331137  1.218662 0.4992932
#[2,]  7.0911356 1.4429691 25.98925  1.7433460  32.240011 11.23175  5.065101 0.06497192  1.412654 0.6150286
#[3,]  8.0468172 1.7422554 26.82079  4.1075459  40.875346 15.21637  5.912239 0.46202892  5.195028 2.9984436
#[4,] 13.1692664 5.0707695 35.30221  6.9730821  49.210129 30.02240 13.503198 0.47158836  6.869221 4.1879503
#[5,] 26.6960730 8.3406034 38.03954  7.8242368 147.147527 32.85519 17.213809 0.63743144  8.265133 6.0967420
#[6,] 40.3028624 8.3471720 54.34851 12.8314358 176.757140 51.07093 21.311200 0.64220118 15.629982 6.4542524


    temp3<- rbind(0, temp2[-maxev,])  #start times: This puts 0 as first start time in 
#all the rows and gets rid of row with the cum time to last event
#temp3:
#numeric matrix: 6 rows, 10 columns. 
#           [,1]      [,2]     [,3]      [,4]       [,5]     [,6]      [,7]       [,8]     [,9]     [,10] 
#[1,]  0.0000000 0.0000000  0.00000 0.0000000   0.000000  0.00000  0.000000 0.00000000 0.000000 0.0000000
#[2,]  0.3371361 0.8753686 21.96783 0.1460226   1.443569  3.08695  3.233358 0.03331137 1.218662 0.4992932
#[3,]  7.0911356 1.4429691 25.98925 1.7433460  32.240011 11.23175  5.065101 0.06497192 1.412654 0.6150286
#[4,]  8.0468172 1.7422554 26.82079 4.1075459  40.875346 15.21637  5.912239 0.46202892 5.195028 2.9984436
#[5,] 13.1692664 5.0707695 35.30221 6.9730821  49.210129 30.02240 13.503198 0.47158836 6.869221 4.1879503
#[6,] 26.6960730 8.3406034 38.03954 7.8242368 147.147527 32.85519 17.213809 0.63743144 8.265133 6.0967420

    keep <- (temp3 <maxt) #keep is true or false, true if temp3 is <maxt, 
#maxt is set to 4 in Therneau's sample 
#keep:
#     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] 
#[1,]    T    T    T    T    T    T    T    T    T     T
#[2,]    T    T    F    T    T    T    T    T    T     T
#[3,]    F    T    F    T    F    F    F    T    T     T
#[4,]    F    T    F    F    F    F    F    T    F     T
#[5,]    F    F    F    F    F    F    F    T    F     F
#[6,]    F    F    F    F    F    F    F    T    F     F

    stat <- 1*(temp2 <maxt) #stat is 1 if temp2 cell is < maxt, otherwise 0 
#(so it is false 1 row sooner than temp3 is false
#This must help get start and stop times right.
#stat:
#     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] 
#[1,]    1    1    0    1    1    1    1    1    1     1
#[2,]    0    1    0    1    0    0    0    1    1     1
#[3,]    0    1    0    0    0    0    0    1    0     1
#[4,]    0    0    0    0    0    0    0    1    0     0
#[5,]    0    0    0    0    0    0    0    1    0     0
#[6,]    0    0    0    0    0    0    0    1    0     0


    temp2 <- ifelse(temp2<maxt, temp2, maxt) #This changes temp2 so that it is temp2 
#if cell is less than maxt and maxt otherwise

#(newtemp2)
#          [,1]      [,2] [,3]      [,4]     [,5]    [,6]     [,7]       [,8]     [,9]     [,10] 
#[1,] 0.3371361 0.8753686    4 0.1460226 1.443569 3.08695 3.233358 0.03331137 1.218662 0.4992932
#[2,] 4.0000000 1.4429691    4 1.7433460 4.000000 4.00000 4.000000 0.06497192 1.412654 0.6150286
#[3,] 4.0000000 1.7422554    4 4.0000000 4.000000 4.00000 4.000000 0.46202892 4.000000 2.9984436
#[4,] 4.0000000 4.0000000    4 4.0000000 4.000000 4.00000 4.000000 0.47158836 4.000000 4.0000000
#[5,] 4.0000000 4.0000000    4 4.0000000 4.000000 4.00000 4.000000 0.63743144 4.000000 4.0000000
#[6,] 4.0000000 4.0000000    4 4.0000000 4.000000 4.00000 4.000000 0.64220118 4.000000 4.0000000

#I added these 3 lines to Therneau's program to derive stop times in gap time.
start2<- c(temp3[keep])  #takes values in temp3 (By column) if keep is true and calls it start
stop2<- c(temp2[keep])  #takes values in temp2 (by column) if keep is true and calls it stop
gapstop<-stop2-start2
#return(gapstop)

#first create enum0 - enum-1
#then if stop is maxt then strata is enum-2, unless enum is 0, in which case, keep it 0

enum0<-(rep(1:maxev-1,nn))[keep]
strata<-ifelse(enum0==0,0,ifelse(stop2<maxt,enum0,ifelse(stop2==maxt & enum0==1,enum0,enum0-1)))
#looky<-cbind(start2,stop2,gapstop)

#return(temp2,temp3,keep,gapstop,enum0,strata,looky)




#td creates a data.frame containing id, start, stop, status,rx,x,enum, and rxgrp
#The id column contains: repeat maxev nn-times, for that number of times, 
#repeat 1:nn, I don't know what the [keep] is. 
#The start column: combines temp3 if keep is true?
#stop combines temp2 if keep is true?
#status ???
#rx repeats maxev nn-times, for that number of times repeat rx 
#(which is 0s and 1s)?, again, don't know what the [keep] is.
#x same thing, except xx is repeated rather than rx.
#enum repeats 1 to maxev, nn times (plus [keep] thing).
#rxgrp repeats rxgrp for the repeats of maxev nn-times, (plus [keep] thing).

    td <- data.frame(id    = (rep(1:nn,rep(maxev,nn)))[keep],
               start = c(temp3[keep]),  #takes values in temp3 (By column) if keep is true and calls it start
               stop  = c(temp2[keep]),  #takes values in temp2 (by column) if keep is true and calls it stop
               status = stat[keep],
               rx    = (rep(rx, rep(maxev,nn)))[keep],
               x     = (rep(xx, rep(maxev,nn)))[keep], #this is the random effect
               enum  = (rep(1:maxev, nn))[keep],
rxgrp = (rep(rxgrp, rep(maxev,nn)))[keep],
gapstart = (rep(gapstart, rep(maxev,nn)))[keep],
gapstop = c(gapstop),
strata = c(strata))


    # add interaction terms to the data.frame td
    for (i in 1:maxev) #for i = 1 to maxev
        td[[paste('rx', i, sep='')]] <- td$rx * (td$enum ==i) #pastes char rx to i, (without a space) and
#put in this column whatever is in rx*true(1), if enum is equal to i
#so this is 1 if they had event i

    if (maxev > 2) { #if case had more than 2 events,
        for (i in 2:(maxev-1)) #then for i from 2 to maxev-1
            td[[paste('rx',i,'p', sep='')]] <- td$rx * (td$enum >=i) #pastes rx to i to p, no spaces, and puts in this column
#rx*true(1) if enum is greater than or equal to i
        }
    td
#write.table(td, "td.txt", sep = ",",T) 
#export.data(td, "td.txt",STATA) 
    }

#td
#   id      start       stop status rx          x enum rxgrp rx1 rx2 rx3 rx4 rx5 rx6 rx2p rx3p rx4p rx5p 
# 1  1 0.00000000 0.33713614      1  1 -1.3487696    1     1   1   0   0   0   0   0    0    0    0    0
# 2  1 0.33713614 4.00000000      0  1 -1.3487696    2     1   0   1   0   0   0   0    1    0    0    0
# 3  2 0.00000000 0.87536864      1  1 -0.3009412    1     1   1   0   0   0   0   0    0    0    0    0
# 4  2 0.87536864 1.44296908      1  1 -0.3009412    2     1   0   1   0   0   0   0    1    0    0    0
# 5  2 1.44296908 1.74225538      1  1 -0.3009412    3     1   0   0   1   0   0   0    1    1    0    0
# 6  2 1.74225538 4.00000000      0  1 -0.3009412    4     1   0   0   0   1   0   0    1    1    1    0
# 7  3 0.00000000 4.00000000      0  1 -0.7337485    1     1   1   0   0   0   0   0    0    0    0    0
# 8  4 0.00000000 0.14602255      1  1  0.5858178    1     1   1   0   0   0   0   0    0    0    0    0
# 9  4 0.14602255 1.74334603      1  1  0.5858178    2     1   0   1   0   0   0   0    1    0    0    0
#10  4 1.74334603 4.00000000      0  1  0.5858178    3     1   0   0   1   0   0   0    1    1    0    0
#11  5 0.00000000 1.44356919      1  1 -1.6648505    1     1   1   0   0   0   0   0    0    0    0    0
#12  5 1.44356919 4.00000000      0  1 -1.6648505    2     1   0   1   0   0   0   0    1    0    0    0
#13  6 0.00000000 3.08695046      1  0 -1.6693917    1     0   0   0   0   0   0   0    0    0    0    0
#14  6 3.08695046 4.00000000      0  0 -1.6693917    2     0   0   0   0   0   0   0    0    0    0    0
#15  7 0.00000000 3.23335777      1  0 -1.1877509    1     0   0   0   0   0   0   0    0    0    0    0
#16  7 3.23335777 4.00000000      0  0 -1.1877509    2     0   0   0   0   0   0   0    0    0    0    0
#   id      start       stop status rx          x enum rxgrp rx1 rx2 rx3 rx4 rx5 rx6 rx2p rx3p rx4p rx5p 
#17  8 0.00000000 0.03331137      1  0  1.9118737    1     0   0   0   0   0   0   0    0    0    0    0
#18  8 0.03331137 0.06497192      1  0  1.9118737    2     0   0   0   0   0   0   0    0    0    0    0
#19  8 0.06497192 0.46202892      1  0  1.9118737    3     0   0   0   0   0   0   0    0    0    0    0
#20  8 0.46202892 0.47158836      1  0  1.9118737    4     0   0   0   0   0   0   0    0    0    0    0
#21  8 0.47158836 0.63743144      1  0  1.9118737    5     0   0   0   0   0   0   0    0    0    0    0
#22  8 0.63743144 0.64220118      1  0  1.9118737    6     0   0   0   0   0   0   0    0    0    0    0
#23  9 0.00000000 1.21866204      1  0 -0.2451871    1     0   0   0   0   0   0   0    0    0    0    0
#24  9 1.21866204 1.41265386      1  0 -0.2451871    2     0   0   0   0   0   0   0    0    0    0    0
#25  9 1.41265386 4.00000000      0  0 -0.2451871    3     0   0   0   0   0   0   0    0    0    0    0
#26 10 0.00000000 0.49929317      1  0 -0.9120608    1     0   0   0   0   0   0   0    0    0    0    0
#27 10 0.49929317 0.61502862      1  0 -0.9120608    2     0   0   0   0   0   0   0    0    0    0    0
#28 10 0.61502862 2.99844363      1  0 -0.9120608    3     0   0   0   0   0   0   0    0    0    0    0
#29 10 2.99844363 4.00000000      0  0 -0.9120608    4     0   0   0   0   0   0   0    0    0    0    0




